细胞器组装 -- 三代数据 -- Canu v2.3
安装(二选一)
mamba安装
mamba create -n pb-assembly
mamba activate pb-assembly
mamba search canu
mamba install -y canu=2.3
下载解压
curl -LRO https://github.com/marbl/canu/releases/download/v2.3/canu-2.3.Linux-amd64.tar.xz
tar -xJf canu-2.3.*.tar.xz # 会报错,缺少依赖
在windows下先解压,传上去
tar -czvf canu-2.3.Linux-amd64.tar.gz canu-2.3.Linux-amd64 # 压缩
tar -zxvf canu-2.3.Linux-amd64.tar.gz # 在放压缩包的路径下解压
cd /share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/bin/
chmod a+x canu
chmod -R 755 /share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/
使用
2.1 组装
## V2.3
/share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/bin/canu -p sample -d 00_assemble_canu genomeSize=160k useGrid=false maxThreads=8 -pacbio-hifi map_gene.fa(fasta、fastq格式都可以)# 依赖库版本不够,用不了
/share/nas6/zhouxy/biosoft/perl/current/bin/perl /share/nas1/yuj/software/micromamba/envs/pb-assembly/bin/canu -p sample -d 00_assemble_canu genomeSize=160k corMhapSensitivity=high corMinCoverage=0 useGrid=false maxThreads=8 -pacbio-hifi map_gene.fa # hifi模式不成环
/share/nas6/zhouxy/biosoft/perl/current/bin/perl /share/nas1/yuj/software/micromamba/envs/pb-assembly/bin/canu -p sample -d 00_assemble_canu genomeSize=160k corMhapSensitivity=high corMinCoverage=0 useGrid=false maxThreads=8 -pacbio map_gene.fa # hifi数据筛选后,使用pacbio模式成环了
## 服务器版本
/share/nas6/zhangxq/biosoft/canu-master/Linux-amd64/bin/canu -p sample -d assresult genomeSize=160k useGrid=false maxThreads=8 -pacbio-raw map_gene.fa
2.2 校正测序数据
## v2.3
/share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/bin/canu -correct -p correct -d correct genomeSize=200k useGrid=false -pacbio-hifi map_gene.fa
/share/nas6/zhouxy/biosoft/perl/current/bin/perl /share/nas1/yuj/software/micromamba/envs/pb-assembly/bin/canu -correct -p correct -d correct genomeSize=200k useGrid=false -pacbio map_gene.fa # 运行ok
## 服务器版本
/share/nas6/zhangxq/biosoft/canu-master/Linux-amd64/bin/canu -correct -p correct -d correct genomeSize=200k useGrid=false -pacbio-raw map_gene.fa
2.3 环化组装结果
2.3.1 运行OK
## v2.3
grep \> 00_assemble_canu/*.contigs.fasta|perl -p -e 's/\>(.+?) len=.* trim=(.+?)-(.+?)/$1\t$2\t$3/' |awk '{start = $2 ; print $1,start,$3}' | tr ' ' '\t'> trim_bed.bed
id=`ls 00_assemble_canu/*.contigs.fasta |perl -p -e 's/.*\/(.+?)\.contigs\.fasta/$1/'`
seqkit subseq --bed trim_bed.bed 00_assemble_canu/$id.contigs.fasta > 00_assemble_canu/$id.contigs.trimed.fasta
2.3.2 环化(报错)
mamba install -y python=3.12
mamba install -y circlator
circlator all --verbose 00_assemble_canu/*.contigs.fasta correct/correct.correctedReads.fasta circlator_outdir
2.4 校正组装结果
使用nextpolish对三代组装进行polish(v1.2.2版) - 简书
## 安装
# mamba install -c bioconda nextpolish
mamba activate pb-assembly
## 二代数据可选
realpath ERR2173372_1.fastq ERR2173372_2.fastq > sgs.fofn # 一行一个
realpath unmapped.fastq > nextpolish.lgs.fofn
cp /share/nas1/yuj/software/nextpolish/nextpolish.run.cfg ./
配置运行文件
nextPolish ./nextpolish.run.cfg
cp nextpolish_rundir/genome.nextpolish.fasta ./
mamba deactivate